HOMEWORK 1

Settings Up

Loading Packages

# Loading Packages 

library(data.table)
library(lubridate)
library(tidyverse)
library(esquisse)
library(plyr)
library(ggplot2)
library(cowplot)
library(naniar) #for NA exploration
library(sp) #spatial data
library(ggmap) #for map
library(osmdata) #openstreetmap
library(reshape2)
library(plotly)

Loading Datas and Cleaning

Laptop_Sales_Data <- fread("DATA/LaptopSales_red.csv")

is.data.table(Laptop_Sales_Data)
## [1] TRUE
summary(Laptop_Sales_Data)
##        V1             Date           Configuration   Customer.Postcode 
##  Min.   :     2   Length:148786      Min.   :  1.0   Length:148786     
##  1st Qu.: 74275   Class :character   1st Qu.:192.0   Class :character  
##  Median :148450   Mode  :character   Median :347.0   Mode  :character  
##  Mean   :148742                      Mean   :379.7                     
##  3rd Qu.:223162                      3rd Qu.:576.0                     
##  Max.   :297572                      Max.   :864.0                     
##                                                                        
##  Store.Postcode      Retail.Price   Screen.Size..Inches. Battery.Life..Hours.
##  Length:148786      Min.   :168.0   Min.   :15.00        Min.   :4.000       
##  Class :character   1st Qu.:440.0   1st Qu.:15.00        1st Qu.:4.000       
##  Mode  :character   Median :500.0   Median :15.00        Median :5.000       
##                     Mean   :508.1   Mean   :15.81        Mean   :4.973       
##                     3rd Qu.:575.0   3rd Qu.:17.00        3rd Qu.:6.000       
##                     Max.   :890.0   Max.   :17.00        Max.   :6.000       
##                     NA's   :6656                                             
##     RAM..GB.     Processor.Speeds..GHz. Integrated.Wireless.  HD.Size..GB.  
##  Min.   :1.000   Min.   :1.50           Length:148786        Min.   : 40.0  
##  1st Qu.:1.000   1st Qu.:1.50           Class :character     1st Qu.: 40.0  
##  Median :2.000   Median :2.00           Mode  :character     Median : 80.0  
##  Mean   :2.101   Mean   :1.93                                Mean   :132.2  
##  3rd Qu.:2.000   3rd Qu.:2.40                                3rd Qu.:120.0  
##  Max.   :4.000   Max.   :2.40                                Max.   :300.0  
##                                                                             
##  Bundled.Applications.   customer.X       customer.Y        store.X      
##  Length:148786         Min.   :512253   Min.   :164886   Min.   :517917  
##  Class :character      1st Qu.:529098   1st Qu.:178716   1st Qu.:528924  
##  Mode  :character      Median :530928   Median :181083   Median :529902  
##                        Mean   :530748   Mean   :179890   Mean   :530644  
##                        3rd Qu.:533076   3rd Qu.:182060   3rd Qu.:534057  
##                        Max.   :549065   Max.   :199846   Max.   :541428  
##                                                          NA's   :85      
##     store.Y      
##  Min.   :168302  
##  1st Qu.:178440  
##  Median :179641  
##  Mean   :179757  
##  3rd Qu.:181567  
##  Max.   :190628  
##  NA's   :85
str(Laptop_Sales_Data)
## Classes 'data.table' and 'data.frame':   148786 obs. of  17 variables:
##  $ V1                    : int  171289 38634 260048 166045 243280 118859 249957 198058 198850 267007 ...
##  $ Date                  : chr  "9/20/2008 2:49" "5/30/2008 9:52" "12/10/2008 9:26" "9/15/2008 9:41" ...
##  $ Configuration         : int  528 307 235 168 517 738 301 301 479 472 ...
##  $ Customer.Postcode     : chr  "NW5 1SP" "N6 6BU" "CR0 2BW" "WC2H 9PS" ...
##  $ Store.Postcode        : chr  "N3 1DH" "N3 1DH" "CR7 8LE" "SW1P 3AU" ...
##  $ Retail.Price          : int  413 515 315 NA 580 535 455 465 600 392 ...
##  $ Screen.Size..Inches.  : int  17 15 15 15 17 17 15 15 17 17 ...
##  $ Battery.Life..Hours.  : int  4 6 5 5 4 6 6 6 4 4 ...
##  $ RAM..GB.              : int  2 1 2 1 2 1 1 1 1 1 ...
##  $ Processor.Speeds..GHz.: num  2.4 2 2.4 2 2.4 2 1.5 1.5 2.4 2.4 ...
##  $ Integrated.Wireless.  : chr  "No" "Yes" "No" "Yes" ...
##  $ HD.Size..GB.          : int  300 80 80 300 120 40 120 120 300 300 ...
##  $ Bundled.Applications. : chr  "No" "Yes" "Yes" "No" ...
##  $ customer.X            : int  528771 528281 532781 530190 537350 532498 533130 529390 533998 532498 ...
##  $ customer.Y            : int  186041 187336 166444 181139 169306 168334 182489 181270 168421 168334 ...
##  $ store.X               : int  525109 525109 532714 529902 528739 528739 534057 528924 528739 532714 ...
##  $ store.Y               : int  190628 190628 168302 179641 173080 173080 179682 178440 173080 168302 ...
##  - attr(*, ".internal.selfref")=<externalptr>
gg_miss_var(Laptop_Sales_Data, show_pct = TRUE)

EX 3.4

a.Price Question

#### Set Up a Data Subset and NA OMIT

Retail_Price_and_Dates <- Laptop_Sales_Data[,.(Retail.Price,Date)][,Date:=mdy_hm(Date)]

Retail_Price_and_Dates <- na.omit(Retail_Price_and_Dates)

i.

#### Histogram of the Retail Price of Computer In 2018

ggplotly(
  ggplot(Retail_Price_and_Dates) +
 aes(x = Retail.Price) +
 geom_histogram(bins = 30L, fill = "#1c6155") +
 labs(x = "Price", y = "Frequency", title = "Histogram of the Retail Price of Computer", subtitle = "In 2018") +
 theme_minimal()
 )
#### Boxplot of the Retail Price of Computer In 2018

ggplotly(
ggplot(Retail_Price_and_Dates) +
 aes(x = "", y = Retail.Price) +
 geom_boxplot(fill = "#1c6155") +
 labs(y = "Price", 
 title = "Boxplot of the Retail Price of Computer", subtitle = "In 2018") +
 theme_minimal()
)
# Actual price

Max_Date_Retail <- max(Retail_Price_and_Dates$Date)

Actual_Price <- Retail_Price_and_Dates[Date %in% Max_Date_Retail, ]

print(paste("Last Recorded Prices are", Actual_Price[1,1], "USD", "and", Actual_Price[2,1],"USD","on the same Day"))
## [1] "Last Recorded Prices are 406 USD and 530 USD on the same Day"

ii.

Retail_Price_and_Dates_Month <- Retail_Price_and_Dates[, mean(Retail.Price), by = floor_date(Date,unit="month")]

colnames(Retail_Price_and_Dates_Month)[2] <- "Mean_Retail_Price"

Retail_Price_and_Dates_Week <- Retail_Price_and_Dates[, mean(Retail.Price), by = floor_date(Date,unit = "week")]

colnames(Retail_Price_and_Dates_Week)[2] <- "Mean_Retail_Price"

Retail_Price_and_Dates_Day <- Retail_Price_and_Dates[, mean(Retail.Price), by = floor_date(Date,unit = "day")]

colnames(Retail_Price_and_Dates_Day)[2] <- "Mean_Retail_Price"

# Retail Price By Month
ggplotly(
  ggplot(Retail_Price_and_Dates_Month) +
 aes(x = floor_date, y = Mean_Retail_Price) +
 geom_line(size = 1.1, 
 colour = "#112446") +
 labs(x = "Month", y = "Price", title = "Retail Price of Computer in 2018", 
 subtitle = "Aggregated by Month") +
 theme_classic()
 )
# Retail Price By Week
ggplotly(
  ggplot(Retail_Price_and_Dates_Week) +
 aes(x = floor_date, y = Mean_Retail_Price) +
 geom_line(size = 0.4, 
 colour = "#112446") +
 labs(x = "Week", y = "Price", title = "Retail Price of Computer in 2018", 
 subtitle = "Aggregated by Week") +
 theme_classic()
 )
# Retail Price By Day
ggplotly(
  ggplot(Retail_Price_and_Dates_Day) +
 aes(x = floor_date, y = Mean_Retail_Price) +
 geom_line(size = 0.2, 
 colour = "#112446") +
 labs(x = "Day", y = "Price", title = "Retail Price of Computer in 2018", 
 subtitle = "Aggregated by Day") +
 theme_classic()
 )

iii.

#### Set Up a Data Subset and NA OMIT

Retail_Price_Outlets_Date <- Laptop_Sales_Data[,.(Retail.Price,Store.Postcode,Date)][,Date:=mdy_hm(Date)]

Retail_Price_Outlets_Date  <- na.omit(Retail_Price_Outlets_Date)

Retail_Price_Configuration <- Laptop_Sales_Data[,.(Retail.Price,Configuration,Screen.Size..Inches.,Battery.Life..Hours.,RAM..GB.,Processor.Speeds..GHz.,Integrated.Wireless.,HD.Size..GB.,Bundled.Applications.)]

Retail_Price_Configuration <- na.omit(Retail_Price_Configuration)
#### Price Decomposed by Retail Outlets

#### Boxplot Across Retail Outlets

ggplotly(
ggplot(Retail_Price_Outlets_Date) +
 aes(x = Store.Postcode, y = Retail.Price) +
 geom_boxplot(fill = "#112446") +
 labs(x = "Stores Postcode", y = "Price", title = "Boxplot Of The Retail Price Across Stores", subtitle = "In 2018") +
 theme_classic() + scale_x_discrete(guide = guide_axis(n.dodge = 1)) + theme(axis.text.x=element_text(size=rel(1), angle=90))
)
#### Retail Price Across Stores During 2018

Retail_Price_Outlets_Date_Month <- Retail_Price_Outlets_Date[,Floor.Date:=floor_date(Date,unit="month")][,c(Mean_Price=mean(Retail.Price)), by=list(Store.Postcode,Floor.Date)]

colnames(Retail_Price_Outlets_Date_Month)[3] <- "Mean_Retail_Price"

#### Plot of the Monthly Retail Price per Stores

ggplotly(
ggplot(Retail_Price_Outlets_Date_Month) +
 aes(x = Floor.Date, y = Mean_Retail_Price, colour = Store.Postcode) +
 geom_line(size = 0.5) +
 scale_color_hue(direction = 1) +
 labs(x = "Month", y = "Price", title = "Retail Price Across Months and Grouped by Stores", 
 subtitle = "In 2018") +
 theme_classic()
)
#### Plot Of The Retail Price per Configuration

ggplotly(
ggplot(Retail_Price_Configuration) +
 aes(x = Configuration, y = Retail.Price, colour = Battery.Life..Hours.) +
 geom_point(shape = "circle", size = 0.8) +
 scale_color_gradient() +
 labs(y = "Retail Price", title = "Retail Price and Configuration ", 
 subtitle = "In 2018") +
 theme_classic()
)

b.Location Questions

i.

ii.

Laptop_Sales_Data_2 <- Laptop_Sales_Data

Sales_Stores <- Laptop_Sales_Data_2[, .N, by=Store.Postcode]

#### Plot Most Sales from Stores

ggplotly(
ggplot(Sales_Stores) +
 aes(x = reorder(Store.Postcode,-N), y = N) +
 geom_col(fill = "#112446") +
 labs(x = "Stores", 
 y = "Number Of Sales", title = "Number of Sales per Store", subtitle = "In 2018") +
 theme_classic()  + geom_text(aes(label = N), vjust = 1.5, hjust=1.1, colour = "white", angle=90,size=3) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
)

iii.

c.Revenue Questions

i.

Sales_Stores_Revenues <- Laptop_Sales_Data[,.(Volumes=.N, Revenues=sum(Retail.Price, na.rm = TRUE), Configuration), by= Store.Postcode]

ggplotly(
ggplot(Sales_Stores_Revenues) +
 aes(x = Volumes, y = Revenues, colour = Store.Postcode) +
 geom_point(shape = "circle", 
 size = 2L) +
 scale_color_hue(direction = 1) +
 labs(x = "Volumes", y = "Revenues", title = "Scatterplot Of Sales Volumes VS Sales Revenues ", 
 subtitle = "In 2018", color = "Stores") +
 theme_classic()
)

ii.

Sales_Stores_Revenues_Configuration_Ratio <- Sales_Stores_Revenues[, ratio_volume_revenue:=Revenues/Volumes]

d.Configuration Questions

i.

Detail_Price <- Laptop_Sales_Data[,.(Retail.Price,Screen.Size..Inches.,Battery.Life..Hours.,RAM..GB.,Processor.Speeds..GHz.,HD.Size..GB.)]

Detail_Price <- na.omit(Detail_Price)

ii.

Stores_Details <- Laptop_Sales_Data[,.(Store.Postcode,Integrated.Wireless.,Bundled.Applications.,Configuration)]

Stores_Details <- na.omit(Stores_Details)

ggplotly(
ggplot(Stores_Details) +
 aes(x = Store.Postcode, fill = Bundled.Applications.) +
 geom_bar() +
 scale_fill_brewer(palette = "Greens", 
 direction = 1) +
 labs(x = "Stores", y = "Count", title = "Stores Bundled Applications Count", 
 subtitle = "In 2018", fill = "Bundled Application") +
 theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 
)
ggplotly(
ggplot(Stores_Details) +
 aes(x = Store.Postcode, fill = Integrated.Wireless.) +
 geom_bar() +
 scale_fill_brewer(palette = "Greens", 
 direction = 1) +
 labs(x = "Stores", y = "Count", title = "Stores Integrated Wireless Count", 
 subtitle = "In 2018", fill = "Integrated Wireless") +
 theme_classic() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 
)